Set Up Libraries

library(forecast)
library(dplyr)
library(tidyr)
library(stringr)
library(lubridate)
library(ggplot2)
theme_set(theme_bw())
library(weatherGen)
data(climate)

# set RNG seed for reproducibility
set.seed(2345)

Load Monthly Climate Data

Get list of files with each file containing a monthly climate dataset for one grid point.

DATA_DIR <- '/Users/jeff/Projects/UMass/virtue/data/mon/east'
files <- list.files(DATA_DIR) %>%
  str_split('_') %>% 
  do.call(rbind, .) %>%
  as.data.frame(stringsAsFactors=FALSE) %>%
  mutate(FILE=paste(V1, V2, V3, sep="_")) %>%
  select(FILE, LAT=V2, LON=V3) %>%
  mutate(LAT=as.numeric(LAT), LON=as.numeric(LON))

Here is a map of the climate data grid points.

library(ggmap)
map <- get_map(location=c(lon=mean(range(files$LON)), lat=mean(range(files$LAT))),
               zoom=4, maptype="satellite", color='bw')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=36.625,-78.8125&zoom=4&size=%20640x640&scale=%202&maptype=satellite&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
ggmap(map, darken=c(0.25, "white"), extent="device") +
  geom_point(aes(x=LON, y=LAT), data=files, color='red', size=1)

plot of chunk map east

Zoomed into new england. The green points will be used to compute a regional average.

map <- get_map(location=c(lon=-72, lat=42),
               zoom=7, maptype="satellite", color='bw')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=42,-72&zoom=7&size=%20640x640&scale=%202&maptype=satellite&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
ggmap(map, darken=c(0.25, "white"), extent="device") +
  geom_point(aes(x=LON, y=LAT, color=SELECT), 
             data=mutate(files, SELECT=(LAT>=41 & LAT<=43 & LON>=-74 & LON<=-70)), size=2) +
  geom_vline(aes(xintercept=LON), color='green', linetype=2, 
             data=data.frame(LON=seq(-74, -70))) +
  geom_hline(aes(yintercept=LAT), color='green', linetype=2, 
             data=data.frame(LAT=seq(41, 43))) +
  scale_color_manual(values=c("TRUE"="green", "FALSE"="red"), guide=FALSE)

plot of chunk map new england

Extract the points within the selected region.

clim.raw <- filter(files, LAT>=41, LAT<=43, LON>=-74, LON<=-70) %>%
  apply(1, function(f) {
    x <- read.table(file.path(DATA_DIR, f[['FILE']]))
    names(x) <- c("YEAR", "MONTH", "PRCP","TMAX","TMIN","TAVG")
    x$LAT <- f['LAT']
    x$LON <- f['LON']
    return(x)
  }) %>%
  do.call(rbind, .)

clim.mon <- gather(clim.raw, VAR, VALUE, PRCP:TAVG) %>%
  mutate(VALUE=as.numeric(VALUE))

clim.yr <- group_by(clim.raw, YEAR, LAT, LON) %>%
  summarise(PRCP=sum(PRCP),
            TMAX=mean(TMAX),
            TMIN=mean(TMIN),
            TAVG=mean(TAVG)) %>%
  gather(VAR, VALUE, PRCP:TAVG)

Regional Average

Compute the regional average monthly value of each climate variable.

clim.reg.mon <- clim.mon %>%
  group_by(YEAR, MONTH, VAR) %>%
  summarise(MEAN=mean(VALUE),
            SD=sd(VALUE))

mutate(clim.reg.mon, DATE=ymd(paste(YEAR,MONTH,1,sep='-'))) %>%
  ggplot(aes(DATE, MEAN)) +
  geom_ribbon(aes(ymin=MEAN-SD, ymax=MEAN+SD), fill='grey80') +
  geom_line(color='red') +
  facet_wrap(~VAR, ncol=1, scales='free_y') +
  labs(x="Month/Year", y="Mean +/- StDev")

plot of chunk climate reg mon

Compute the regional average annual value of each climate variable.

clim.reg.yr <-  clim.yr %>%
  group_by(VAR, YEAR) %>%
  summarise(MEAN=mean(VALUE),
            SD=sd(VALUE))

ggplot(clim.reg.yr, aes(YEAR, MEAN)) +
  geom_ribbon(aes(ymin=MEAN-SD, ymax=MEAN+SD), fill='grey80') +
  geom_line(color='red') +
  facet_wrap(~VAR, ncol=1, scales='free_y') +
  labs(x="Year", y="Mean +/- StDev")

plot of chunk climate reg yr

Convert the regional average datasets wide format.

clim.reg.mon <- select(clim.reg.mon, -SD) %>%
  spread(VAR, MEAN)
head(clim.reg.mon)
## Source: local data frame [6 x 6]
## 
##   YEAR MONTH   PRCP   TMAX   TMIN  TAVG
## 1 1949     1 117.52  4.243 -4.818 7.308
## 2 1949     2  77.78  4.882 -6.123 6.925
## 3 1949     3  48.22  8.025 -3.279 8.426
## 4 1949     4  98.86 15.229  2.510 6.285
## 5 1949     5 108.86 21.098  7.175 5.046
## 6 1949     6  23.19 27.450 13.578 5.312
clim.reg.yr <- select(clim.reg.yr, -SD) %>%
  spread(VAR, MEAN)
head(clim.reg.yr)
## Source: local data frame [6 x 5]
## 
##   YEAR   PRCP  TMAX  TMIN  TAVG
## 1 1949  912.9 16.15 4.105 6.266
## 2 1950 1075.9 14.42 2.871 6.151
## 3 1951 1281.2 14.87 3.502 6.087
## 4 1952 1168.9 15.11 3.731 6.409
## 5 1953 1329.0 16.01 4.151 5.943
## 6 1954 1291.9 14.62 3.333 6.300

Wavelet Analysis

Perform wavelet analysis on regional average annual precipitation timeseries.

wave <- wavelet_analysis(clim.reg.yr$PRCP, sig.level=0.90, noise.type='white')

par(mfrow=c(1,2))
plot(wave, plot.cb=TRUE, plot.phase=FALSE)
plot(wave$gws, wave$period, type="b",
     xlab="Global Wavelet Spectrum", ylab="Fourier Period (Years)",
     log="y", ylim=rev(range(wave$period)), 
     xlim=range(c(wave$gws, wave$gws.sig$signif)))
lines(wave$gws.sig$signif, wave$period, lty=2, col='red')

plot of chunk plot wave

Although there are some significant wavelet periods, we’ll use a simple arima model of the regional annual precipitation instead of arima models of the wavelet components.

models <- list(PRCP=auto.arima(clim.reg.yr$PRCP,
                               max.p=2,max.q=2,max.P=0,max.Q=0,
                               stationary=TRUE))
models[["PRCP"]]
## Series: clim.reg.yr$PRCP 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##         ar1  intercept
##       0.218    1183.87
## s.e.  0.126      26.69
## 
## sigma^2 estimated as 27243:  log likelihood=-404.6
## AIC=815.2   AICc=815.6   BIC=821.6

This figure shows a 20-year forecast of the arima model reflecting no low-frequency oscillations (since we are not using the wavelet decomposition).

par(mfrow=c(1,1))
plot(forecast(models[['PRCP']], h=20), main="ARIME Forecast of Regional Annual Precip",
     xlab="Timestep (yr)", ylab='Annual Precip (mm/yr)')

plot of chunk plot arima forecast

Generate 50 simulations of annual precipitation using the AR1 model of the regional annual precipitation.

sim.prcp <- weatherGen::mc_arimas(models=models, n=nrow(clim.reg.yr), n.iter=50)
str(sim.prcp)
## List of 4
##  $ x       : num [1:62, 1:50] 1063 1355 1381 1024 1460 ...
##  $ gws     : num [1:21, 1:50] 9569 14863 21023 27508 23138 ...
##  $ x.stat  : num [1:62, 1:3] 1168 1178 1188 1182 1185 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:3] "MEAN" "Q025" "Q975"
##  $ gws.stat: num [1:21, 1:3] 12705 20034 21128 22864 24353 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:3] "MEAN" "Q025" "Q975"

Compare the GWS power and the mean, standard deviation, and skewness statistics across these simulations (red point is the overall mean, boxplots are the distribution across simulations) to regional annual precipitation timeseries (blue point).

par(mfrow=c(2,2))
x1 <- 1
x2 <- min(length(wave$period), nrow(sim.prcp$gws))
ymin <- min(wave$period[x1:x2], wave$gws[x1:x2],
            sim.prcp$gws.stat[x1:x2, 'MEAN'],
            sim.prcp$gws.stat[x1:x2, 'Q025'])
ymax <- max(wave$period[x1:x2], wave$gws[x1:x2],
            sim.prcp$gws.stat[x1:x2, 'MEAN'],
            sim.prcp$gws.stat[x1:x2, 'Q975'])
plot(wave$period[x1:x2], wave$gws[x1:x2],
     type="n", ylim=c(ymin,ymax), xlab="Period (years)", ylab="",
     main="", log="y")
mtext(side=2, expression(paste("Power (",mm^2,")")), line=2.5)
polygon(c(wave$period[x1:x2],
          rev(wave$period[x1:x2])),
        c(sim.prcp$gws.stat[x1:x2, 'Q025'],
          rev(sim.prcp$gws.stat[x1:x2, 'Q975'])),
        col="grey")
lines(wave$period[x1:x2], wave$gws[x1:x2])
lines(wave$period[x1:x2], sim.prcp$gws.stat[x1:x2, 'MEAN'], lty=2, col="blue")
lines(wave$period[x1:x2], wave$gws.sig$signif[x1:x2], col="red", lty=3)

boxplot(colMeans(sim.prcp$x), main="Mean")
points(mean(clim.reg.yr$PRCP),col="red",pch=19)
points(mean(colMeans(sim.prcp$x)), col="blue", pch=19)

boxplot(apply(sim.prcp$x, 2, sd), main="Standard Deviation")
points(sd(clim.reg.yr$PRCP), col="red", pch=19)
points(mean(apply(sim.prcp$x, 2, sd)), col="blue", pch=19)

boxplot(apply(sim.prcp$x, 2, skewness), main="Skew")
points(skewness(clim.reg.yr$PRCP), col="red", pch=19)
points(mean(apply(sim.prcp$x, 2, skewness)),col="blue",pch=19)

plot of chunk plot gws stats

This figure shows the simulated and observed regional-average annual precipitation. The simulations were generated using the AR1 model described above.

as.data.frame(sim.prcp$x) %>%
  mutate(TIMESTEP=row_number()) %>% 
  gather(TRIAL, VALUE, -TIMESTEP) %>% 
  ggplot(aes(TIMESTEP, VALUE)) + 
  geom_line(aes(group=TRIAL), alpha=0.3) + 
  stat_summary(fun.y=mean, geom='line', color='red', size=1) +
  geom_line(aes(x=TIMESTEP, y=PRCP), data=mutate(clim.reg.yr, TIMESTEP=row_number()), color='blue', size=1) +
  labs(x="Year", y="Annual Precipitation (mm/yr)", 
       title="WARM Annual Precipitation Simulations\nRed Line=Mean of Simulations, Blue Line=Observd Regional Mean")

plot of chunk plot ar sims

Monthly Weather Generator

Select Location

First, select a random location from the complete climate dataset.

clim.locs <- select(clim.yr, LAT, LON) %>%
  unique()
loc <- clim.locs[sample(nrow(clim.locs), size=1),] %>% as.numeric
names(loc) <- c('LAT', 'LON')
loc
##    LAT    LON 
##  42.19 -72.19

Now extract the monthly and annual climate datasets for this location.

clim.loc.yr <- filter(clim.yr, LAT==loc[['LAT']], LON==loc[['LON']]) %>%
  spread(VAR, VALUE) %>%
  arrange(YEAR)
clim.loc.mon <- filter(clim.mon, LAT==loc[['LAT']], LON==loc[['LON']]) %>%
  spread(VAR, VALUE) %>%
  arrange(YEAR, MONTH)

Nearest Neighbors

Compute the euclidian distance between a single simulated annual precipitation value and each observed year. The simulated annual precipitation is for the first year in the first simulation trial.

j <- 1 # year number
samp <- 1 # simulation trial number

# compute distances between simulated WARM timeseries and observed regional precip
distances <- cbind(clim.reg.yr, DISTANCE=sqrt((sim.prcp$x[j,samp] - clim.reg.yr$PRCP)^2))

ggplot(distances, aes(YEAR, PRCP)) + 
  geom_line() +
  geom_point(aes(color=DISTANCE), size=3) +
  geom_hline(yint=sim.prcp$x[j,samp], linetype=2, color='red') +
  geom_text(x=max(distances$YEAR), y=sim.prcp$x[j,samp], label="Sim", hjust=1, vjust=-0.5) +
  scale_color_gradient(low='green', high='red') +
  labs(x="Year", y="Annual Precipitation (mm/yr)", 
       title="Regional and Current Simulation Annual Precipitation")

plot of chunk distances

Select the eight years from the observed regional annual simulation that are the most similar to the first simulation trial and year.

distances <- mutate(distances,
                    IN_TOP_8=rank(DISTANCE) %in% seq(1,8))
distances %>%
  ggplot(aes(YEAR, PRCP)) + 
  geom_line() +
  geom_point(aes(color=DISTANCE), size=3) +
  geom_hline(yint=sim.prcp$x[j,samp], linetype=2, color='red') +
  geom_point(mapping=aes(alpha=IN_TOP_8), color='black', shape=1, size=4) +
  geom_text(aes(x=YEAR, y=PRCP, label=YEAR), data=filter(distances, IN_TOP_8), vjust=-1) +
  geom_text(x=max(distances$YEAR), y=sim.prcp$x[j,samp], label="Sim", hjust=1, vjust=-0.5) +
  scale_alpha_manual(values=c("TRUE"=1, "FALSE"=0), guide=FALSE) +
  scale_color_gradient(low='green', high='red') +
  labs(x="Year", y="Annual Precipitation (mm)", title="Regional Annual Precipitation with 8 Nearest Neighbors Selected")

plot of chunk plot top distances

Extract the observed regional precipitation for the candidate years and compute the sampling weights as inverse distance-squared weighted.

distances.top <- filter(distances, IN_TOP_8) %>%
  mutate(WEIGHT=(1/DISTANCE)^2,
         WEIGHT=WEIGHT/sum(WEIGHT))
distances.top %>%
  ggplot(aes(PRCP, WEIGHT)) +
  geom_point() +
  geom_vline(xint=sim.prcp$x[j,samp], linetype=2, color='red') +
  geom_text(aes(label=YEAR), hjust=-0.1) +
  geom_text(x=sim.prcp$x[j,samp], y=0.4, label="Sim Precip", hjust=-0.1) +
  labs(x="Annual Precipitation (mm)", y="Sample Weight")

plot of chunk top distances

Now we can randomly sample from the 8 nearest neighbors using the sampling weights based on the inverse distance squared. We’ll do this 1000 times and compare the frequency of the samples to the sample weights.

sampled.years <- sample(distances.top$YEAR,
                        size=1000,
                        prob=distances.top$WEIGHT,
                        replace=TRUE)
sampled.years.tbl <- prop.table(table(sampled.years))
merge(distances.top, 
      data.frame(YEAR=names(sampled.years.tbl),
                 FREQ=as.vector(sampled.years.tbl)),
      all.x=TRUE) %>%
  select(YEAR, WEIGHT, FREQ) %>%
  ggplot(aes(WEIGHT, FREQ)) +
  geom_point(size=3) +
  geom_abline(linetype=2) +
  labs(x="Sample Weight", y="Sampled Frequency")

plot of chunk sample years

Now if we just sample for one year.

sampled.year <- sample(distances.top$YEAR, size=1, prob=distances.top$WEIGHT, replace=TRUE)
sampled.year
## [1] 1988

Then we can extract the observed monthly climate data for the sampled year from the observed dataset of the selected location.

filter(clim.loc.mon, YEAR==sampled.year)
##    YEAR MONTH   LAT    LON   PRCP  TMAX   TMIN TAVG
## 1  1988     1 42.19 -72.19  69.50 -0.79 -14.22 7.47
## 2  1988     2 42.19 -72.19  91.22  2.19 -10.25 7.72
## 3  1988     3 42.19 -72.19  67.57  7.65  -4.45 7.30
## 4  1988     4 42.19 -72.19  90.10 11.46   1.60 7.16
## 5  1988     5 42.19 -72.19  83.15 19.66   7.44 4.71
## 6  1988     6 42.19 -72.19  30.23 24.21  10.75 5.48
## 7  1988     7 42.19 -72.19 195.85 27.53  16.17 4.91
## 8  1988     8 42.19 -72.19  56.88 27.33  16.02 5.23
## 9  1988     9 42.19 -72.19  48.98 21.52   7.96 5.19
## 10 1988    10 42.19 -72.19  73.50 13.09   1.31 7.04
## 11 1988    11 42.19 -72.19 205.03 10.55  -1.17 8.80
## 12 1988    12 42.19 -72.19  30.42  2.41  -8.64 8.66

Generate Complete Timeseries

Given the selected location, repeat the nearest neighbor process for each of the num_sim simulations and num_year years, and store the result in a 3-dimensional array.

num_sim <- ncol(sim.prcp$x)
num_year <- nrow(sim.prcp$x) # number of composited years
num_months <- num_year*12

climate_variables <- select(clim.reg.yr, -YEAR) %>%
  names()

sampled.year <- array(NA, c(num_year, num_sim))
gen.loc.mon.arr <- array(NA, c(num_months, length(climate_variables), num_sim))
# loop through simulations (trials)
for (samp in 1:num_sim) {
    # loop through years
    for (j in 1:num_year) {
        # compute distances between simulated WARM timeseries and observed regional precip
      distances <- mutate(clim.reg.yr, 
                          DISTANCE=sqrt((sim.prcp$x[j, samp] - clim.reg.yr$PRCP)^2),
                          IN_TOP_8=rank(DISTANCE) %in% seq(1,8))
      distances.top <- filter(distances, IN_TOP_8) %>%
                       mutate(WEIGHT=(1/DISTANCE)^2,
                              WEIGHT=WEIGHT/sum(WEIGHT))
      sampled.year[j,samp] <- sample(distances.top$YEAR,
                                     size=1,
                                     prob=distances.top$WEIGHT,
                                     replace=FALSE)
      row.idx <- ((1+12*(j-1)):(12+12*(j-1)))
      gen.loc.mon.arr[row.idx, , samp] <- data.matrix(filter(clim.loc.mon, 
                                                             YEAR==sampled.year[j, samp]) %>%
                                                      select(PRCP, TMAX, TMIN, TAVG))
    }
}

Now convert the 3-dim array to a list of dataframes with each element in the list containing the simulated data for a single variable.

gen.loc.mon <- sapply(climate_variables, function(v) {
  i.var <- which(climate_variables==v)
  df <- as.data.frame(gen.loc.mon.arr[,i.var,]) %>%
    mutate(TIMESTEP=row_number(),
           SIM_YEAR=rep(seq(1, num_year), each=12),
           SIM_MONTH=rep(seq(1, 12), times=num_year),
           VAR=v) %>%
    gather(TRIAL, VALUE, -TIMESTEP, -SIM_YEAR, -SIM_MONTH, -VAR) %>%
    mutate(TRIAL=as.numeric(TRIAL))
  return(list(df))
})
str(gen.loc.mon)
## List of 4
##  $ PRCP:'data.frame':    37200 obs. of  6 variables:
##   ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ VAR      : chr [1:37200] "PRCP" "PRCP" "PRCP" "PRCP" ...
##   ..$ TRIAL    : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ VALUE    : num [1:37200] 69.5 91.2 67.6 90.1 83.2 ...
##  $ TMAX:'data.frame':    37200 obs. of  6 variables:
##   ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ VAR      : chr [1:37200] "TMAX" "TMAX" "TMAX" "TMAX" ...
##   ..$ TRIAL    : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ VALUE    : num [1:37200] -0.79 2.19 7.65 11.46 19.66 ...
##  $ TMIN:'data.frame':    37200 obs. of  6 variables:
##   ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ VAR      : chr [1:37200] "TMIN" "TMIN" "TMIN" "TMIN" ...
##   ..$ TRIAL    : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ VALUE    : num [1:37200] -14.22 -10.25 -4.45 1.6 7.44 ...
##  $ TAVG:'data.frame':    37200 obs. of  6 variables:
##   ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ VAR      : chr [1:37200] "TAVG" "TAVG" "TAVG" "TAVG" ...
##   ..$ TRIAL    : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ VALUE    : num [1:37200] 7.47 7.72 7.3 7.16 4.71 5.48 4.91 5.23 5.19 7.04 ...

Aggregate the generated timeseries to compute annual values or each variable.

gen.loc.yr <- lapply(gen.loc.mon, function(df) {
  if ('PRCP' %in% unique(df$VAR)) {
    df.yr <- group_by(df, VAR, TRIAL, SIM_YEAR) %>%
      summarise(VALUE=sum(VALUE))
  } else {
    df.yr <- group_by(df, VAR, TRIAL, SIM_YEAR) %>%
      summarise(VALUE=mean(VALUE))
  }
  df.yr <- df.yr %>%
    ungroup() %>%
    as.data.frame()
  return(df.yr)
})
str(gen.loc.yr)
## List of 4
##  $ PRCP:'data.frame':    3100 obs. of  4 variables:
##   ..$ VAR     : chr [1:3100] "PRCP" "PRCP" "PRCP" "PRCP" ...
##   ..$ TRIAL   : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ VALUE   : num [1:3100] 1042 1300 1408 1065 1451 ...
##  $ TMAX:'data.frame':    3100 obs. of  4 variables:
##   ..$ VAR     : chr [1:3100] "TMAX" "TMAX" "TMAX" "TMAX" ...
##   ..$ TRIAL   : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ VALUE   : num [1:3100] 13.9 15.1 14.4 14.1 14.5 ...
##  $ TMIN:'data.frame':    3100 obs. of  4 variables:
##   ..$ VAR     : chr [1:3100] "TMIN" "TMIN" "TMIN" "TMIN" ...
##   ..$ TRIAL   : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ VALUE   : num [1:3100] 1.88 3.48 2.83 1.66 2.35 ...
##  $ TAVG:'data.frame':    3100 obs. of  4 variables:
##   ..$ VAR     : chr [1:3100] "TAVG" "TAVG" "TAVG" "TAVG" ...
##   ..$ TRIAL   : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ VALUE   : num [1:3100] 6.64 6.44 6.62 6.56 6.39 ...

This figure shows all the weather generator simulations compared to the observed climate dataset at the selected location.

do.call(rbind, gen.loc.yr) %>%
  mutate(VAR=factor(VAR)) %>%
  ggplot() +
    geom_line(aes(SIM_YEAR, VALUE, group=TRIAL), alpha=0.5) +
    geom_line(aes(SIM_YEAR, VALUE), 
              data=mutate(clim.reg.yr, SIM_YEAR=row_number()) %>%
                gather(VAR, VALUE, PRCP:TAVG), 
              color='red') +
    facet_wrap(~VAR, scales='free_y') +
    labs(x="Simulation Year", y="Value", title="All Generated Datasets with Observed Location Dataset ")

plot of chunk plot gen loc yr

This figure shows only the first weather generator simulation compared to the observed climate dataset at the selected location.

do.call(rbind, gen.loc.yr) %>%
  mutate(VAR=factor(VAR)) %>%
  filter(TRIAL==1) %>%
  ggplot() +
    geom_line(aes(SIM_YEAR, VALUE, group=TRIAL), alpha=0.5) +
    geom_line(aes(SIM_YEAR, VALUE), 
              data=mutate(clim.reg.yr, SIM_YEAR=row_number()) %>%
                gather(VAR, VALUE, PRCP:TAVG), 
              color='red') +
    facet_wrap(~VAR, scales='free_y') +
    labs(x="Simulation Year", y="Value", title="Single Generated Dataset with Observed Location Dataset ")

plot of chunk plot gen loc yr one

The following three figures compare the mean, standard deviation, and skewness of the annual precipitation timeseries between the weather generator simulations and the observed climate dataset at the selected location. The boxplots show the distribution of each statistic across the simulations, the red point shows the mean of the statistic for the simulations, and the blue point shows the statistic value for the observed annual timeseries.

do.call(rbind, gen.loc.yr) %>%
  mutate(VAR=factor(VAR)) %>%
  group_by(VAR, TRIAL) %>%
  summarise(MEAN=mean(VALUE)) %>%
  ggplot(aes(x=1, y=MEAN)) +
    geom_boxplot() +
    stat_summary(fun.y=mean, geom="point", color="red", size=3) +
    geom_point(aes(x=1, y=MEAN), 
               data=gather(clim.loc.yr, VAR, VALUE, PRCP:TAVG) %>% 
                      group_by(VAR) %>% 
                      summarise(MEAN=mean(VALUE)),
               color='blue', size=3) +
    facet_wrap(~VAR, scales='free_y') +
    labs(x="", y="Mean", title="Mean of Annual Precipitation for Generated (Red) and Observed (Blue)") +
    theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())

plot of chunk plot gen loc yr box mean

do.call(rbind, gen.loc.yr) %>%
  mutate(VAR=factor(VAR)) %>%
  group_by(VAR, TRIAL) %>%
  summarise(SD=sd(VALUE)) %>%
  ggplot(aes(x=1, y=SD)) +
    geom_boxplot() +
    stat_summary(fun.y=mean, geom="point", color="red", size=3) +
    geom_point(aes(x=1, y=SD), 
               data=gather(clim.loc.yr, VAR, VALUE, PRCP:TAVG) %>% 
                      group_by(VAR) %>% 
                      summarise(SD=sd(VALUE)),
               color='blue', size=3) +
    facet_wrap(~VAR, scales='free_y') +
    labs(x="", y="Standard Deviation", title="StDev of Annual Precipitation for Generated (Red) and Observed (Blue)") +
    theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())

plot of chunk plot gen loc yr box sd

do.call(rbind, gen.loc.yr) %>%
  mutate(VAR=factor(VAR)) %>%
  group_by(VAR, TRIAL) %>%
  summarise(SKEW=skewness(VALUE)) %>%
  ggplot(aes(x=1, y=SKEW)) +
    geom_boxplot() +
    stat_summary(fun.y=mean, geom="point", color="red", size=3) +
    geom_point(aes(x=1, y=SKEW), 
               data=gather(clim.loc.yr, VAR, VALUE, PRCP:TAVG) %>% 
                      group_by(VAR) %>% 
                      summarise(SKEW=skewness(VALUE)),
               color='blue', size=3) +
    facet_wrap(~VAR, scales='free_y') +
    labs(x="", y="Skewness", title="Skewness of Annual Precipitation for Generated (Red) and Observed (Blue)") +
    theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())

plot of chunk plot gen loc yr box skew

Session Info

sessionInfo()
## R version 3.1.1 (2014-07-10)
## Platform: x86_64-apple-darwin13.1.0 (64-bit)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] splines   grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] weatherGen_0.1   devtools_1.5     Hmisc_3.14-4     Formula_1.1-1   
##  [5] survival_2.37-7  lattice_0.20-29  mapproj_1.2-2    maps_2.3-7      
##  [9] ggmap_2.3        ggplot2_1.0.0    lubridate_1.3.3  stringr_0.6.2   
## [13] tidyr_0.1        dplyr_0.2        forecast_5.4     timeDate_3010.98
## [17] zoo_1.7-11      
## 
## loaded via a namespace (and not attached):
##  [1] assertthat_0.1      biwavelet_0.17.3    car_2.0-20         
##  [4] cluster_1.15.2      colorspace_1.2-4    digest_0.6.4       
##  [7] evaluate_0.5.5      fields_7.1          formatR_0.10       
## [10] fracdiff_1.4-2      gtable_0.1.2        htmltools_0.2.4    
## [13] httr_0.3            hydromad_0.9-20     knitr_1.6          
## [16] labeling_0.2        latticeExtra_0.6-26 magrittr_1.0.1     
## [19] MASS_7.3-33         memoise_0.2.1       munsell_0.4.2      
## [22] nnet_7.3-8          parallel_3.1.1      plyr_1.8.1         
## [25] png_0.1-7           proto_0.3-10        quadprog_1.5-5     
## [28] RColorBrewer_1.0-5  Rcpp_0.11.2         RCurl_1.95-4.1     
## [31] reshape2_1.4        RgoogleMaps_1.2.0.6 rjson_0.2.14       
## [34] RJSONIO_1.2-0.2     rmarkdown_0.2.50    scales_0.2.4       
## [37] spam_0.41-0         tools_3.1.1         tseries_0.10-32    
## [40] whisker_0.3-2       yaml_2.1.13